package com.chance.cc.crawler.development.bootstrap.jiuxian;

import com.alibaba.fastjson.JSON;
import com.chance.cc.crawler.core.CrawlerEnum;
import com.chance.cc.crawler.core.downloader.HttpConfig;
import com.chance.cc.crawler.core.downloader.proxy.Proxy;
import com.chance.cc.crawler.core.filter.FilterUtils;
import com.chance.cc.crawler.core.record.CrawlerRecord;
import com.chance.cc.crawler.core.record.CrawlerRequestRecord;
import com.chance.cc.crawler.development.controller.DevCrawlerController;
import org.apache.commons.lang3.StringUtils;

import static com.chance.cc.crawler.core.CrawlerEnum.CrawlerRequestType.filter;

/**
 * @author lt
 * @version 1.0
 * @date 2021-01-22 13:42:41
 * @email okprog@sina.com
 */
public class JiuXianStart {

    private static final String domain = "jiuxian";

    private static Proxy proxy = new Proxy();
    static {
        //代理配置
        //HL89Q19E86E2987D
        //71F33D94CE5F7BF2
        proxy.setHost("http-dyn.abuyun.com");
        proxy.setPort(9020);
        proxy.setUsername("HL89Q19E86E2987D");
        proxy.setPassword("71F33D94CE5F7BF2");
    }

    public static void main(String[] args) {

        CrawlerRequestRecord requestRecord = CrawlerRequestRecord.builder()
                .startPageRequest(domain, CrawlerEnum.CrawlerRequestType.turnPage)
                .httpUrl("https://www.jiuxian.com")
                .recordKey("https://www.jiuxian.com")
                .httpConfig(HttpConfig.me(domain))
                .releaseTime(System.currentTimeMillis())
                .filter(CrawlerEnum.CrawlerRecordFilter.key)
                .addFilterInfo(FilterUtils.memoryFilterKeyInfo(domain))
                .needParsed(true)
                .needWashed(false)
                .build();
        requestRecord.tagsCreator().bizTags().addDomain(domain);
//        requestRecord.tagsCreator().bizTags().addCustomKV("wine","白酒");
//        requestRecord.tagsCreator().bizTags().addCustomKV("type","酱香型");
        //评论去重信息
        CrawlerRecord filterCrawlerRecord = new CrawlerRecord();
        filterCrawlerRecord.setFilter(CrawlerEnum.CrawlerRecordFilter.key);
        filterCrawlerRecord.addFilterInfo(FilterUtils.memoryFilterKeyInfo(StringUtils.joinWith("-",filter,domain,"comment")));
        requestRecord.tagsCreator().bizTags().addCustomKV("comment_record_filter_info", JSON.toJSONString(filterCrawlerRecord));
        //采集评论的标签

        DevCrawlerController devCrawlerController = DevCrawlerController.builder()
                .triggerInfo(domain,domain + "_trigger",System.currentTimeMillis(),domain+ "_job")
                .crawlerRequestQueue(DevCrawlerController.devRequestQueue(domain)) //内存队列
                .consoleResultPipeline() //控制台输t出
//                .fileResultPipeline("D:\\chance\\data\\jiuxian\\jiuxian_test.json",false)
                .crawlerThreadNum(20)
                .requestRecord(requestRecord)
                .build();
        devCrawlerController.getCrawlerJob().getScheduleTags().getCategoryTag().addLabelTag(CrawlerEnum.CrawlerDataType.comment.enumVal());
        devCrawlerController.start();
    }
}
